###   R code to import data for 
###   'When should the majority rule? Experimental 
###     evidence for Madisonian judgments in five cultures'
###   by Bor, Mazepus, Bokemper and DeScioli
################################### #
###   This code will import and pool the data from the 
###   Hungary, Denmark, India, USA, Russia studies

###   Code by Alexander Bor
#####################################
###   Code written with R version 3.6.2

# install.packages("rio")
# install.packages("tidyverse")
# install.packages("forcats")
# install.packages("here")

# Load packages 
library(rio)
library(tidyverse)
library(forcats)
library(here)

# Load raw data 

hun_r1 <- import("A. original data/Minority_Hungary_p1.csv")  # Hungary round 1 
hun_r2 <- import("A. original data/Minority_Hungary_p2.csv")  # Hungary round 2 
ind <- import("A. original data/Minority_India.csv")          # India 
rus <- import("A. original data/Minority_Russia.csv")         # Russia
usa <- import("A. original data/Minority_USA.csv")            # USA
den <- import("A. original data/Minority_Denmark.csv")        # Denmark

# Clean open ended "Age" variables from weird characters to allow merging

rus$Age <- as.integer(gsub(pattern = "[^0-9+]", "", rus$Age))
den$Age <- as.integer(gsub(pattern = "[^0-9+]", "", den$Age))
usa$Age <- as.integer(gsub(pattern = "[^0-9+]", "", usa$Age))

# The appropriatenes DVs in Denmark are mislabelled. Change to conform to others.
names(den) <- gsub("_5", "_2", names(den))
names(den) <- gsub("_6", "_3", names(den))
names(den) <- gsub("_7", "_4", names(den))

# due to randomization error half of Danish sample is dropped
# (vulnerable minority was manipulated within subject)

controls <- c("DinC", "ActC", "ComC")
treatments <- c("DinT", "ActT", "ComT")
den$mix <- ifelse(
        rowSums(den[,controls], na.rm = T) != 2 & 
                rowSums(den[,treatments], na.rm = T) != 2, 
        1, 0
)

den <- den %>% filter(mix == 0)

# merge country data files

df_raw <- bind_rows("Hungary" = hun_r1, "Hungary" = hun_r2, "India" = ind, 
                    "USA" = usa, "Russia" = rus, "Denmark" = den,
                    .id = "country")

# create clean long data file
df_all <- df_raw %>% 
        dplyr::select(
                matches(".Din"), matches(".Act"), matches(".Com"),  # these grab all DVs
                Ideo, Sex, Age, ResponseID, contains("Aut"), country, # demographic vars
                Comp, Party
        ) %>% 
        gather(variables, values, 1:30, na.rm = T) %>%    # pool all DVs.
        # extract info coded in DV names.
        separate(variables, c("DV", "scenario", "condition", "q_number"),  
                 c(3, 6, 7)) %>% 
        transmute(ID = ResponseID, 
                  # forced choice DV had no numbers. replace with "Dec"
                  DV = ifelse(DV == "Dec", DV, q_number), 
                  # create nice labels for DVs
                  DV = fct_recode(as.factor(DV), 
                                  decision = "Dec", 
                                  app_leader = "_1", 
                                  app_consensus = "_2", 
                                  app_vote = "_3", 
                                  app_chance = "_4"),
                  # nice labels for scenarios (within subject)
                  scenario = fct_recode(as.factor(scenario), 
                                        activity = "Act", 
                                        dinner = "Din", 
                                        company = "Com"), 
                  # conditions (btw subjects)
                  condition = fct_recode(as.factor(condition),
                                         treatment = "T", 
                                         control = "C"), 
                  
                  values = values, 
                  ideo = ifelse(country == "Denmark" & Ideo == 12, NA, 
                                ifelse(country == "Denmark",
                                       ((Ideo - 1) / 10 * 6) + 1,
                                       Ideo)),
                  age = ifelse(Age < 99 & Age > 16, Age, NA),
                  sex = Sex,
                  comprehension = ifelse(Comp > 5, Comp-5, Comp),  # Danish responses by accident coded 6-10
                  # authoritarianism =   ifelse(Aut1 == 2, 0, Aut1) +
                  #                       ifelse(Aut2 == 2, 0, Aut2) + 
                  #                        (Aut3 - 1) + (Aut4 - 1),
                  Party = case_when(
                          country == "Hungary" & Party == 1 ~ "Fidesz",
                          country == "Hungary" & Party == 3 ~ "MSZP",
                          country == "Hungary" & Party == 4 ~ "Jobbik",
                          country == "Hungary" & Party == 5 ~ "DK",
                          country == "Hungary" & Party == 6 ~ "LMP",
                          country == "USA" & Party == 1 ~ "Democratic",
                          country == "USA" & Party == 4 ~ "Republican",
                          country == "India" & Party == 1 ~ "NDA",
                          country == "India" & Party == 4 ~ "UPA",
                          country == "India" & Party == 5 ~ "Janata_Parivar",
                          country == "Denmark" & Party == 1 ~ "SocDem",
                          country == "Denmark" & Party == 2 ~ "Radikale",
                          country == "Denmark" & Party == 3 ~ "Konservative",
                          country == "Denmark" & Party == 4 ~ "SF",
                          country == "Denmark" & Party == 5 ~ "LA",
                          country == "Denmark" & Party == 7 ~ "DanskFolkeparti",
                          country == "Denmark" & Party == 8 ~ "Venstre",
                          country == "Denmark" & Party == 9 ~ "Enhedlisten",
                          country == "Denmark" & Party == 10 ~ "NyeBorgerlige",
                          country == "Russia" & Party == 1 ~ "KPRF",
                          country == "Russia" & Party == 4 ~ "ER",
                          country == "Russia" & Party == 5 ~ "LDPR",
                          country == "Russia" & Party == 6 ~ "Yabloko",
                          country == "Russia" & Party == 8 ~ "SR",
                          TRUE ~ "Other_None"),
                  country = country,
                  country_scenario = paste(country, scenario, sep = "_")) %>% 
        # unpool DVs. 
        spread(DV, values) %>% 
        # labels for forced choice DV
        mutate(decision = fct_recode(as.factor(decision), 
                                     leader = "1", 
                                     consensus = "2", 
                                     vote = "3", 
                                     chance = "4"), 
               decision = fct_relevel(decision, "vote", "consensus", 
                                      "leader"), 
               app_leader = app_leader - 4,
               app_consensus = app_consensus - 4,
               app_vote = app_vote - 4,
               app_chance = app_chance - 4,
               scenario = fct_relevel(scenario, "dinner", "activity")
        ) %>% 
        # exclude those who didn't get to the end
        filter(!is.na(comprehension))

# create variable with country_scenario info
df_all$country_scenario <- paste(df_all$country, df_all$scenario, sep = "_")



# exclude responses with failed comprehension tests
df <- df_all[!is.na(df_all$comprehension) & df_all$comprehension == 5, ]


# save datasets
save(df_all, file = here("C. analysis data", "minority_all_data.Rdata"))
save(df, file = here("C. analysis data", "minority_data.Rdata"))
write.csv(df, file = here("C. analysis data", "minority_data.csv"))
